{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 基于机器学习的pyborker量化交易策略"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<diskcache.core.Cache at 0x1639a5872d0>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pybroker\n",
    "from pybroker.ext.data import AKShare\n",
    "from pybroker import ExecContext, StrategyConfig, Strategy\n",
    "from pybroker.data import DataSource\n",
    "import matplotlib.pyplot as plt\n",
    "from datetime import datetime\n",
    "import riskfolio as rp\n",
    "import akshare as ak\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sqlite3\n",
    "import datetime\n",
    "\n",
    "import talib\n",
    "from pybroker.vect import cross\n",
    "\n",
    "#正常显示画图时出现的中文和负号\n",
    "from pylab import mpl\n",
    "\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.model_selection import train_test_split \n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.metrics import r2_score\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "\n",
    "mpl.rcParams['font.sans-serif']=['SimHei']\n",
    "mpl.rcParams['axes.unicode_minus']=False\n",
    "\n",
    "akshare = AKShare()\n",
    "\n",
    "pybroker.enable_data_source_cache('akshare')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基于机器学习算法进行下个交易日的股票收益预测\n",
    "\n",
    "在scikit-learn中，`r2_score` 是用于评估回归模型性能的一个重要指标，也被称为**决定系数（Coefficient of Determination）**。它衡量的是模型预测值对目标变量变化的解释程度，取值范围通常为 `(-∞, 1]`。\n",
    "\n",
    "### 基本原理\n",
    "- **R² = 1**：表示模型完全拟合数据，预测值与真实值完全一致。\n",
    "- **R² = 0**：表示模型预测效果等同于简单平均基准线（即预测值恒为目标变量的均值）。\n",
    "- **R² < 0**：表示模型性能比简单平均基准线还差，可能存在过拟合或错误的建模。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "conn=sqlite3.connect(r'I:\\量化金融\\stock_2018.db')\n",
    "stock_daily1=pd.read_sql(\"select * from stock_daily where 股票代码<'003000.SZ'\",con=conn)\n",
    "stock_daily1[\"交易日期\"]=pd.to_datetime(stock_daily1[\"交易日期\"].astype(str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['index', '交易日期', '股票代码', '股票简称', '开盘价', '最高价', '最低价', '收盘价', '成交量(手)',\n",
       "       '成交额(千元)', '换手率(%)', '量比', '市盈率(静态)', '市盈率(TTM)', '市盈率(动态)', '市净率',\n",
       "       '市销率', '市销率(TTM)', '股息率(%)', '股息率(TTM)(%)', '总股本(万股)', '流通股本(万股)',\n",
       "       '总市值(万元)', '流通市值(万元)'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stock_daily1.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "stock_daily1.columns=['index', \"date\",\"symbol\", '股票简称', \"open\",\"high\",\"low\",\"close\",\"volume\",\n",
    "       '成交额(千元)', '换手率(%)', '量比', '市盈率(静态)', '市盈率(TTM)', '市盈率(动态)', '市净率',\n",
    "       '市销率', '市销率(TTM)', '股息率(%)', '股息率(TTM)(%)', '总股本(万股)', '流通股本(万股)',\n",
    "       '总市值(万元)', '流通市值(万元)']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def func0(x):\n",
    "    return x.pct_change().shift(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "stock_daily1[\"return_s1\"]=stock_daily1.groupby(\"symbol\", group_keys=False).close.apply(func0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "xy=stock_daily1[['换手率(%)','市盈率(静态)','总市值(万元)',\"return_s1\"]].dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "xy_x=xy[['换手率(%)','市盈率(静态)','总市值(万元)']].values\n",
    "xy_y=xy[\"return_s1\"].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "x1,x2,y1,y2=train_test_split(xy_x,xy_y,test_size=0.7)#分割数据出训练集与测试集，0.7是两者行数的比例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.13972127,  0.27547332, -0.1416296 ],\n",
       "       [-0.73762917, -0.38246096,  0.17937243],\n",
       "       [-0.16066238, -0.27286238, -0.27403239],\n",
       "       ...,\n",
       "       [ 1.33949352,  0.15112145, -0.29883629],\n",
       "       [-0.24321984, -0.22960699, -0.11603221],\n",
       "       [ 1.68755648,  0.47981947, -0.32925919]])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = GradientBoostingRegressor()\n",
    "#clf = LinearRegression()\n",
    "clf = clf.fit(x1,y1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: #000;\n",
       "  --sklearn-color-text-muted: #666;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-1 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-1 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: flex;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "  align-items: start;\n",
       "  justify-content: space-between;\n",
       "  gap: 0.5em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label .caption {\n",
       "  font-size: 0.6rem;\n",
       "  font-weight: lighter;\n",
       "  color: var(--sklearn-color-text-muted);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-1 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-1 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 0.5em;\n",
       "  text-align: center;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GradientBoostingRegressor()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>GradientBoostingRegressor</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html\">?<span>Documentation for GradientBoostingRegressor</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>GradientBoostingRegressor()</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "GradientBoostingRegressor()"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3.79243795e-04, 1.83752795e-03, 8.82648021e-05, ...,\n",
       "       3.52756560e-04, 1.78506119e-04, 1.39414674e-03])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.predict(x2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0020314384798882923"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r2_score(y2,clf.predict(x2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.019961936150356283"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mean_absolute_error(y2,clf.predict(x2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## pybroker中的机器学习算法训练\n",
    "\n",
    "在 `pybroker` 中，`bootstrap_sample_size=100` 是通过 `StrategyConfig` 类设置的参数，用于控制自举法（Bootstrapping）的样本量。自举法是一种统计重采样技术，用于估计交易策略性能指标的分布和置信区间，从而更可靠地评估策略的稳定性和风险。\n",
    "\n",
    "\n",
    " **自举法的核心思想**\n",
    "传统回测通常只基于单一历史路径评估策略，但市场存在随机性。自举法通过以下步骤模拟这种随机性：\n",
    "1. **有放回抽样**：从实际交易结果（如每日收益）中随机抽取样本，允许重复抽取。\n",
    "2. **重建分布**：每次抽样后，计算策略的性能指标（如收益率、夏普比率等）。\n",
    "3. **多次迭代**：重复抽样和计算过程，生成大量的性能指标样本，形成分布。\n",
    "4. **统计推断**：基于分布计算置信区间、标准差等，评估策略的可靠性。\n",
    "\n",
    "\n",
    " **`bootstrap_sample_size=100` 的具体含义**\n",
    "这意味着在评估策略时：\n",
    "- **每次窗口测试**（如前向分析中的每个时间窗口），`pybroker` 会执行 **100 次自举抽样**。\n",
    "- **每次抽样**会从实际交易结果中随机抽取相同数量的样本点（有放回），并计算一组新的性能指标。\n",
    "- **最终结果**：基于这 100 组指标的分布，计算置信区间（如 95% 置信区间），量化策略的不确定性。\n",
    "\n",
    "例如，若夏普比率的自举分布为 `1.2 ± 0.3`（95% 置信区间），则表明策略的真实夏普比率有 95% 的概率在 0.9 到 1.5 之间，帮助你更全面地理解策略的风险。\n",
    "\n",
    "\n",
    " **为什么需要自举法？**\n",
    "1. **传统指标的局限性**：单次回测的夏普比率或收益率可能受特定历史路径的影响，无法反映策略的真实风险。\n",
    "2. **捕捉随机性**：自举法通过模拟不同的市场路径，揭示策略在各种场景下的表现。\n",
    "3. **统计显著性**：通过置信区间判断策略的盈利是否偶然，避免过度自信。\n",
    "\n",
    "\n",
    " **参数选择建议**\n",
    "- **样本量越大**：结果越接近真实分布，但计算成本越高。\n",
    "- **常见取值**：100-1000 之间。`100` 是一个平衡计算效率和准确性的常见选择。\n",
    "- **实际应用**：若需要更精确的置信区间，可增加到 500 或 1000，但计算时间会相应延长。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_slr0(symbol, train_data, test_data):\n",
    "    x1=train_data[['close','high','low','volume']]\n",
    "    y1=train_data['return_s1']\n",
    "    #model = DecisionTreeRegressor()\n",
    "    model = GradientBoostingRegressor()\n",
    "    #model = LinearRegression()\n",
    "    model.fit(x1, y1)\n",
    "\n",
    "    x2=test_data[['close','high','low','volume']]\n",
    "    y2=test_data['return_s1']\n",
    "    y_pred = model.predict(x2)\n",
    "    r2 = r2_score(y2, y_pred)\n",
    "    print(symbol, f'R^2={r2}')\n",
    "\n",
    "    return model,[\"close\",\"high\",\"low\",\"volume\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_slr = pybroker.model('slr', train_slr0)\n",
    "pyb_data_pe=stock_daily1[[\"date\",\"symbol\",\"open\",\"high\",\"low\",\"close\",\"volume\",\"return_s1\"]].dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "pybroker.register_columns('return_s1')\n",
    "config = StrategyConfig(bootstrap_sample_size=100)\n",
    "strategy = Strategy(pyb_data_pe, '2018-03-19', '2021-02-15', config)\n",
    "strategy.add_execution(None, ['000001.SZ','002594.SZ'], models=model_slr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Backtesting: 2018-03-19 00:00:00 to 2021-02-15 00:00:00\n",
      "\n",
      "Train split: 2018-03-20 00:00:00 to 2019-08-28 00:00:00\n",
      "000001.SZ R^2=-0.6884334867922908\n",
      "002594.SZ R^2=-0.2150875731114421\n",
      "Finished training models: 0:00:00 \n",
      "\n",
      "Finished backtest: 0:00:00\n"
     ]
    }
   ],
   "source": [
    "result = strategy.backtest(train_size=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## pybroker中的算法回测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Backtesting: 2018-03-19 00:00:00 to 2022-02-15 00:00:00\n",
      "\n",
      "Train split: 2018-03-19 00:00:00 to 2020-03-02 00:00:00\n",
      "000001.SZ R^2=-0.26581129990175056\n",
      "002594.SZ R^2=-0.9508445974775328\n",
      "Finished training models: 0:00:00 \n",
      "\n",
      "Test split: 2020-03-03 00:00:00 to 2022-02-15 00:00:00\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0% (0 of 475) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--\n",
      " 10% (51 of 475) |##                     | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 29% (141 of 475) |######                | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 48% (231 of 475) |##########            | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 69% (331 of 475) |###############       | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 90% (431 of 475) |###################   | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      "100% (475 of 475) |######################| Elapsed Time: 0:00:00 Time:  0:00:00\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Finished backtest: 0:00:05\n"
     ]
    }
   ],
   "source": [
    "def hold_long(ctx):\n",
    "    if not ctx.long_pos():\n",
    "        # Buy if the next bar is predicted to have a positive return:\n",
    "        if ctx.preds('slr')[-1] > 0.:\n",
    "            ctx.buy_shares = ctx.calc_target_shares(0.5)\n",
    "    else:\n",
    "        # Sell if the next bar is predicted to have a negative return:\n",
    "        if ctx.preds('slr')[-1] < 0.:\n",
    "            ctx.sell_shares = ctx.calc_target_shares(0.5)\n",
    "\n",
    "model_slr = pybroker.model('slr', train_slr0)\n",
    "\n",
    "pybroker.register_columns('return_s1')\n",
    "config = StrategyConfig(bootstrap_sample_size=100)\n",
    "strategy = Strategy(pyb_data_pe, '2018-03-19', '2022-02-15', config)\n",
    "strategy.add_execution(hold_long, ['000001.SZ','002594.SZ'], models=model_slr)\n",
    "\n",
    "result = strategy.backtest(train_size=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>type</th>\n",
       "      <th>symbol</th>\n",
       "      <th>entry_date</th>\n",
       "      <th>exit_date</th>\n",
       "      <th>entry</th>\n",
       "      <th>exit</th>\n",
       "      <th>shares</th>\n",
       "      <th>pnl</th>\n",
       "      <th>return_pct</th>\n",
       "      <th>agg_pnl</th>\n",
       "      <th>bars</th>\n",
       "      <th>pnl_per_bar</th>\n",
       "      <th>stop</th>\n",
       "      <th>mae</th>\n",
       "      <th>mfe</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>long</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2020-03-10</td>\n",
       "      <td>2020-03-11</td>\n",
       "      <td>1595.50</td>\n",
       "      <td>1610.24</td>\n",
       "      <td>31</td>\n",
       "      <td>456.94</td>\n",
       "      <td>0.92</td>\n",
       "      <td>456.94</td>\n",
       "      <td>1</td>\n",
       "      <td>456.94</td>\n",
       "      <td>None</td>\n",
       "      <td>-25.65</td>\n",
       "      <td>25.66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>long</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2020-03-16</td>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>1539.83</td>\n",
       "      <td>1479.24</td>\n",
       "      <td>32</td>\n",
       "      <td>-1938.88</td>\n",
       "      <td>-3.93</td>\n",
       "      <td>-1481.94</td>\n",
       "      <td>1</td>\n",
       "      <td>-1938.88</td>\n",
       "      <td>None</td>\n",
       "      <td>-60.59</td>\n",
       "      <td>38.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>long</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2020-03-13</td>\n",
       "      <td>2020-03-18</td>\n",
       "      <td>55.46</td>\n",
       "      <td>54.36</td>\n",
       "      <td>864</td>\n",
       "      <td>-950.40</td>\n",
       "      <td>-1.98</td>\n",
       "      <td>-2432.34</td>\n",
       "      <td>3</td>\n",
       "      <td>-316.80</td>\n",
       "      <td>None</td>\n",
       "      <td>-2.59</td>\n",
       "      <td>2.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>short</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2020-03-18</td>\n",
       "      <td>2020-03-24</td>\n",
       "      <td>54.36</td>\n",
       "      <td>49.56</td>\n",
       "      <td>36</td>\n",
       "      <td>172.80</td>\n",
       "      <td>9.69</td>\n",
       "      <td>-2259.54</td>\n",
       "      <td>4</td>\n",
       "      <td>43.20</td>\n",
       "      <td>None</td>\n",
       "      <td>-1.35</td>\n",
       "      <td>5.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>long</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2020-03-18</td>\n",
       "      <td>2020-03-26</td>\n",
       "      <td>1430.11</td>\n",
       "      <td>1422.47</td>\n",
       "      <td>33</td>\n",
       "      <td>-252.12</td>\n",
       "      <td>-0.53</td>\n",
       "      <td>-2511.66</td>\n",
       "      <td>6</td>\n",
       "      <td>-42.02</td>\n",
       "      <td>None</td>\n",
       "      <td>-129.91</td>\n",
       "      <td>49.13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>long</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2022-01-17</td>\n",
       "      <td>2022-01-18</td>\n",
       "      <td>1824.33</td>\n",
       "      <td>1829.92</td>\n",
       "      <td>12</td>\n",
       "      <td>67.08</td>\n",
       "      <td>0.31</td>\n",
       "      <td>-39150.17</td>\n",
       "      <td>1</td>\n",
       "      <td>67.08</td>\n",
       "      <td>None</td>\n",
       "      <td>-20.15</td>\n",
       "      <td>20.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>short</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2022-01-18</td>\n",
       "      <td>2022-01-21</td>\n",
       "      <td>1829.92</td>\n",
       "      <td>1945.76</td>\n",
       "      <td>4</td>\n",
       "      <td>-463.36</td>\n",
       "      <td>-5.95</td>\n",
       "      <td>-39613.53</td>\n",
       "      <td>3</td>\n",
       "      <td>-154.45</td>\n",
       "      <td>None</td>\n",
       "      <td>-124.24</td>\n",
       "      <td>21.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171</th>\n",
       "      <td>long</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2022-01-21</td>\n",
       "      <td>2022-01-24</td>\n",
       "      <td>1945.76</td>\n",
       "      <td>1922.82</td>\n",
       "      <td>11</td>\n",
       "      <td>-252.34</td>\n",
       "      <td>-1.18</td>\n",
       "      <td>-39865.87</td>\n",
       "      <td>1</td>\n",
       "      <td>-252.34</td>\n",
       "      <td>None</td>\n",
       "      <td>-22.94</td>\n",
       "      <td>19.59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>short</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2022-01-24</td>\n",
       "      <td>2022-02-07</td>\n",
       "      <td>1922.82</td>\n",
       "      <td>1807.54</td>\n",
       "      <td>4</td>\n",
       "      <td>461.12</td>\n",
       "      <td>6.38</td>\n",
       "      <td>-39404.75</td>\n",
       "      <td>5</td>\n",
       "      <td>92.22</td>\n",
       "      <td>None</td>\n",
       "      <td>-22.38</td>\n",
       "      <td>152.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>173</th>\n",
       "      <td>long</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2022-02-07</td>\n",
       "      <td>2022-02-09</td>\n",
       "      <td>1807.54</td>\n",
       "      <td>1886.45</td>\n",
       "      <td>12</td>\n",
       "      <td>946.92</td>\n",
       "      <td>4.37</td>\n",
       "      <td>-38457.83</td>\n",
       "      <td>2</td>\n",
       "      <td>473.46</td>\n",
       "      <td>None</td>\n",
       "      <td>-29.10</td>\n",
       "      <td>91.78</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>173 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      type     symbol entry_date  exit_date    entry     exit  shares  \\\n",
       "id                                                                      \n",
       "1     long  000001.SZ 2020-03-10 2020-03-11  1595.50  1610.24      31   \n",
       "2     long  000001.SZ 2020-03-16 2020-03-17  1539.83  1479.24      32   \n",
       "3     long  002594.SZ 2020-03-13 2020-03-18    55.46    54.36     864   \n",
       "4    short  002594.SZ 2020-03-18 2020-03-24    54.36    49.56      36   \n",
       "5     long  000001.SZ 2020-03-18 2020-03-26  1430.11  1422.47      33   \n",
       "..     ...        ...        ...        ...      ...      ...     ...   \n",
       "169   long  000001.SZ 2022-01-17 2022-01-18  1824.33  1829.92      12   \n",
       "170  short  000001.SZ 2022-01-18 2022-01-21  1829.92  1945.76       4   \n",
       "171   long  000001.SZ 2022-01-21 2022-01-24  1945.76  1922.82      11   \n",
       "172  short  000001.SZ 2022-01-24 2022-02-07  1922.82  1807.54       4   \n",
       "173   long  000001.SZ 2022-02-07 2022-02-09  1807.54  1886.45      12   \n",
       "\n",
       "         pnl  return_pct   agg_pnl  bars  pnl_per_bar  stop     mae     mfe  \n",
       "id                                                                           \n",
       "1     456.94        0.92    456.94     1       456.94  None  -25.65   25.66  \n",
       "2   -1938.88       -3.93  -1481.94     1     -1938.88  None  -60.59   38.75  \n",
       "3    -950.40       -1.98  -2432.34     3      -316.80  None   -2.59    2.96  \n",
       "4     172.80        9.69  -2259.54     4        43.20  None   -1.35    5.56  \n",
       "5    -252.12       -0.53  -2511.66     6       -42.02  None -129.91   49.13  \n",
       "..       ...         ...       ...   ...          ...   ...     ...     ...  \n",
       "169    67.08        0.31 -39150.17     1        67.08  None  -20.15   20.14  \n",
       "170  -463.36       -5.95 -39613.53     3      -154.45  None -124.24   21.26  \n",
       "171  -252.34       -1.18 -39865.87     1      -252.34  None  -22.94   19.59  \n",
       "172   461.12        6.38 -39404.75     5        92.22  None  -22.38  152.21  \n",
       "173   946.92        4.37 -38457.83     2       473.46  None  -29.10   91.78  \n",
       "\n",
       "[173 rows x 15 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.trades"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>trade_count</td>\n",
       "      <td>173.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>initial_market_value</td>\n",
       "      <td>100000.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>end_market_value</td>\n",
       "      <td>62344.77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_pnl</td>\n",
       "      <td>-38457.83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>unrealized_pnl</td>\n",
       "      <td>802.60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   name      value\n",
       "0           trade_count     173.00\n",
       "1  initial_market_value  100000.00\n",
       "2      end_market_value   62344.77\n",
       "3             total_pnl  -38457.83\n",
       "4        unrealized_pnl     802.60"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.metrics_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## pybroker中的算法前向测试\n",
    "\n",
    "https://www.pybroker.com/zh-cn/latest/notebooks/6.%20Training%20a%20Model.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Backtesting: 2018-03-19 00:00:00 to 2022-02-15 00:00:00\n",
      "\n",
      "Train split: 2018-03-21 00:00:00 to 2019-03-12 00:00:00\n",
      "000001.SZ R^2=-0.1357062022922011\n",
      "002594.SZ R^2=-0.2507676498010858\n",
      "Finished training models: 0:00:00 \n",
      "\n",
      "Test split: 2019-03-13 00:00:00 to 2020-03-03 00:00:00\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0% (0 of 237) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--\n",
      " 25% (61 of 237) |#####                  | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 55% (131 of 237) |############          | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 84% (201 of 237) |##################    | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      "100% (237 of 237) |######################| Elapsed Time: 0:00:00 Time:  0:00:00\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Train split: 2019-03-13 00:00:00 to 2020-03-03 00:00:00\n",
      "000001.SZ R^2=-0.33170840978823257\n",
      "002594.SZ R^2=-0.565988750552967\n",
      "Finished training models: 0:00:00 \n",
      "\n",
      "Test split: 2020-03-04 00:00:00 to 2021-02-23 00:00:00\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0% (0 of 237) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--\n",
      " 21% (51 of 237) |####                   | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 55% (131 of 237) |############          | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 93% (221 of 237) |####################  | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      "100% (237 of 237) |######################| Elapsed Time: 0:00:00 Time:  0:00:00\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Train split: 2020-03-04 00:00:00 to 2021-02-23 00:00:00\n",
      "000001.SZ R^2=-0.42795351524708747\n",
      "002594.SZ R^2=-0.41951301434582167\n",
      "Finished training models: 0:00:00 \n",
      "\n",
      "Test split: 2021-02-24 00:00:00 to 2022-02-15 00:00:00\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0% (0 of 237) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--\n",
      " 17% (41 of 237) |###                    | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 42% (101 of 237) |#########             | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 67% (161 of 237) |##############        | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      " 89% (211 of 237) |###################   | Elapsed Time: 0:00:00 ETA:   0:00:00\n",
      "100% (237 of 237) |######################| Elapsed Time: 0:00:00 Time:  0:00:00\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Calculating bootstrap metrics: sample_size=100, samples=10000...\n",
      "Calculated bootstrap metrics: 0:00:06 \n",
      "\n",
      "Finished backtest: 0:00:08\n"
     ]
    }
   ],
   "source": [
    "strategy.clear_executions()\n",
    "strategy.add_execution(hold_long, ['000001.SZ','002594.SZ'], models=model_slr)\n",
    "\n",
    "result = strategy.walkforward(\n",
    "    warmup=20,\n",
    "    windows=3,\n",
    "    train_size=0.5,\n",
    "    lookahead=1,\n",
    "    calc_bootstrap=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>type</th>\n",
       "      <th>symbol</th>\n",
       "      <th>date</th>\n",
       "      <th>shares</th>\n",
       "      <th>limit_price</th>\n",
       "      <th>fill_price</th>\n",
       "      <th>fees</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>buy</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2019-04-12</td>\n",
       "      <td>895</td>\n",
       "      <td>NaN</td>\n",
       "      <td>55.08</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>buy</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2019-04-15</td>\n",
       "      <td>33</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1498.93</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sell</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2019-04-16</td>\n",
       "      <td>33</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1519.46</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>sell</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2019-04-16</td>\n",
       "      <td>907</td>\n",
       "      <td>NaN</td>\n",
       "      <td>56.06</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>buy</td>\n",
       "      <td>000001.SZ</td>\n",
       "      <td>2019-04-18</td>\n",
       "      <td>32</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1538.36</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>424</th>\n",
       "      <td>sell</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2022-01-26</td>\n",
       "      <td>210</td>\n",
       "      <td>NaN</td>\n",
       "      <td>251.78</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>425</th>\n",
       "      <td>buy</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2022-01-27</td>\n",
       "      <td>204</td>\n",
       "      <td>NaN</td>\n",
       "      <td>250.05</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>426</th>\n",
       "      <td>sell</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2022-02-09</td>\n",
       "      <td>212</td>\n",
       "      <td>NaN</td>\n",
       "      <td>245.53</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>427</th>\n",
       "      <td>buy</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2022-02-10</td>\n",
       "      <td>207</td>\n",
       "      <td>NaN</td>\n",
       "      <td>247.91</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>428</th>\n",
       "      <td>sell</td>\n",
       "      <td>002594.SZ</td>\n",
       "      <td>2022-02-14</td>\n",
       "      <td>216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>234.60</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>428 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     type     symbol       date  shares  limit_price  fill_price  fees\n",
       "id                                                                    \n",
       "1     buy  002594.SZ 2019-04-12     895          NaN       55.08   0.0\n",
       "2     buy  000001.SZ 2019-04-15      33          NaN     1498.93   0.0\n",
       "3    sell  000001.SZ 2019-04-16      33          NaN     1519.46   0.0\n",
       "4    sell  002594.SZ 2019-04-16     907          NaN       56.06   0.0\n",
       "5     buy  000001.SZ 2019-04-18      32          NaN     1538.36   0.0\n",
       "..    ...        ...        ...     ...          ...         ...   ...\n",
       "424  sell  002594.SZ 2022-01-26     210          NaN      251.78   0.0\n",
       "425   buy  002594.SZ 2022-01-27     204          NaN      250.05   0.0\n",
       "426  sell  002594.SZ 2022-02-09     212          NaN      245.53   0.0\n",
       "427   buy  002594.SZ 2022-02-10     207          NaN      247.91   0.0\n",
       "428  sell  002594.SZ 2022-02-14     216          NaN      234.60   0.0\n",
       "\n",
       "[428 rows x 7 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.orders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>trade_count</td>\n",
       "      <td>413.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>initial_market_value</td>\n",
       "      <td>100000.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>end_market_value</td>\n",
       "      <td>130676.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>total_pnl</td>\n",
       "      <td>34315.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>unrealized_pnl</td>\n",
       "      <td>-3638.61</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   name      value\n",
       "0           trade_count     413.00\n",
       "1  initial_market_value  100000.00\n",
       "2      end_market_value  130676.60\n",
       "3             total_pnl   34315.21\n",
       "4        unrealized_pnl   -3638.61"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.metrics_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "quant",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
